package com.lyy.cloud.utils;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author liushiwei
 */
@Component
@Slf4j
public class HTMLTagUtil {
    /**
     * 定义script的正则表达式
     */
    private static final String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>";
    /**
     * 定义style的正则表达式
     */
    private static final String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>";
    /**
     * 定义HTML标签的正则表达式
     */
    private static final String regEx_html = "<[^>]+>";
    /**
     * 定义空格回车换行符
     */
    private static final String regEx_space = "\\s*|\t|\r|\n";
    /**
     * 定义所有w标签
     */
    private static final String regEx_w = "<w[^>]*?>[\\s\\S]*?<\\/w[^>]*?>";


    /**
     * 删除Html标签
     * @param htmlStr
     * @return
     */
    public static String delHTMLTag(String htmlStr) {
        Pattern p_w = Pattern.compile(regEx_w, Pattern.CASE_INSENSITIVE);
        Matcher m_w = p_w.matcher(htmlStr);
        // 过滤script标签
        htmlStr = m_w.replaceAll("");


        Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
        Matcher m_script = p_script.matcher(htmlStr);
        // 过滤script标签
        htmlStr = m_script.replaceAll("");


        Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
        Matcher m_style = p_style.matcher(htmlStr);
        // 过滤style标签
        htmlStr = m_style.replaceAll("");


        Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
        Matcher m_html = p_html.matcher(htmlStr);
        // 过滤html标签
        htmlStr = m_html.replaceAll("");


        Pattern p_space = Pattern.compile(regEx_space, Pattern.CASE_INSENSITIVE);
        Matcher m_space = p_space.matcher(htmlStr);
        // 过滤空格回车标签
        htmlStr = m_space.replaceAll("");

        //过滤
        htmlStr = htmlStr.replaceAll(" ", "");
        // 返回文本字符串
        return htmlStr.trim();
    }

    /**
     * 替换"<"后是非英文字母的"<"，替换为“&lt;”
     * @param content
     * @return
     */
    public static String replaceBrackets(String content ){
        String regex = "<[^/A-Za-z]";

        Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
        Matcher ma = pa.matcher(content);

        while (ma.find()) {
            String hit = ma.group();
            String val = hit.substring(1, hit.length());
            content = StringUtils.replace(content, hit, StringUtils.join("&lt;", val));

        }

        return content;
    }


    /**
     * 删除a标签，保留a标签内容
     * @param content 值
     * @return String
     */
    public static String delHtmlA(String content){
        if(StringUtils.isBlank(content)){
            return content;
        }
        content=content.replaceAll("<a href[^>]*>", "");
        content=content.replaceAll("</a>", "");
        return content;
    }
}
